{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from nltk.corpus import stopwords\n",
    "from sklearn.metrics.pairwise import linear_kernel\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.decomposition import LatentDirichletAllocation\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import warnings\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "pd.options.display.max_columns = 30\n",
    "warnings.filterwarnings('ignore')\n",
    "plt.rcParams['font.sans-serif'] = ['SimHei']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "      <th>desc</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Hilton Garden Seattle Downtown</td>\n",
       "      <td>1821 Boren Avenue, Seattle Washington 98101 USA</td>\n",
       "      <td>Located on the southern tip of Lake Union, the...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sheraton Grand Seattle</td>\n",
       "      <td>1400 6th Avenue, Seattle, Washington 98101 USA</td>\n",
       "      <td>Located in the city's vibrant core, the Sherat...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Crowne Plaza Seattle Downtown</td>\n",
       "      <td>1113 6th Ave, Seattle, WA 98101</td>\n",
       "      <td>Located in the heart of downtown Seattle, the ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Kimpton Hotel Monaco Seattle</td>\n",
       "      <td>1101 4th Ave, Seattle, WA98101</td>\n",
       "      <td>What?s near our hotel downtown Seattle locatio...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Westin Seattle</td>\n",
       "      <td>1900 5th Avenue, Seattle, Washington 98101 USA</td>\n",
       "      <td>Situated amid incredible shopping and iconic a...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             name  \\\n",
       "0  Hilton Garden Seattle Downtown   \n",
       "1          Sheraton Grand Seattle   \n",
       "2   Crowne Plaza Seattle Downtown   \n",
       "3   Kimpton Hotel Monaco Seattle    \n",
       "4              The Westin Seattle   \n",
       "\n",
       "                                           address  \\\n",
       "0  1821 Boren Avenue, Seattle Washington 98101 USA   \n",
       "1   1400 6th Avenue, Seattle, Washington 98101 USA   \n",
       "2                  1113 6th Ave, Seattle, WA 98101   \n",
       "3                   1101 4th Ave, Seattle, WA98101   \n",
       "4   1900 5th Avenue, Seattle, Washington 98101 USA   \n",
       "\n",
       "                                                desc  \n",
       "0  Located on the southern tip of Lake Union, the...  \n",
       "1  Located in the city's vibrant core, the Sherat...  \n",
       "2  Located in the heart of downtown Seattle, the ...  \n",
       "3  What?s near our hotel downtown Seattle locatio...  \n",
       "4  Situated amid incredible shopping and iconic a...  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file = './Seattle_Hotels.csv'\n",
    "df = pd.read_csv(file, encoding='latin-1')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "152"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_desc(index):\n",
    "    desc = df[df.index == index][['name','desc']].values[0]\n",
    "    if len(desc) >  0:\n",
    "        print('name:',desc[0])\n",
    "        print('desc:',desc[1])\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "name: W Seattle\n",
      "desc: Soak up the vibrant scene in the Living Room Bar and get in the mix with our live music and DJ series before heading to a memorable dinner at TRACE. Offering inspired seasonal fare in an award-winning atmosphere, it's a not-to-be-missed culinary experience in downtown Seattle. Work it all off the next morning at FIT®, our state-of-the-art fitness center before wandering out to explore many of the area's nearby attractions, including Pike Place Market, Pioneer Square and the Seattle Art Museum. As always, we've got you covered during your time at W Seattle with our signature Whatever/Whenever® service - your wish is truly our command.\n"
     ]
    }
   ],
   "source": [
    "print_desc(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([['W Seattle',\n",
       "        \"Soak up the vibrant scene in the Living Room Bar and get in the mix with our live music and DJ series before heading to a memorable dinner at TRACE. Offering inspired seasonal fare in an award-winning atmosphere, it's a not-to-be-missed culinary experience in downtown Seattle. Work it all off the next morning at FIT®, our state-of-the-art fitness center before wandering out to explore many of the area's nearby attractions, including Pike Place Market, Pioneer Square and the Seattle Art Museum. As always, we've got you covered during your time at W Seattle with our signature Whatever/Whenever® service - your wish is truly our command.\"]],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.index ==10][['name', 'desc']].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_top_n_words(corpus, n=1, k=None):\n",
    "    # 统计ngram词频矩阵\n",
    "    cv = CountVectorizer(ngram_range=(n, n), stop_words='english') \n",
    "    # 设置停用词，设为english将使用内置的英语停用词，设为一个list可自定义停用词，设为None不使用停用词，设为None且max_df∈[0.7, 1.0)将自动根据当前的语料库建立停用词表\n",
    "    cv_fit = cv.fit(corpus)\n",
    "    '''\n",
    "    print(cv_fit.get_features_names())\n",
    "    '''\n",
    "    cv_transform = cv_fit.transform(corpus)\n",
    "    sum_words = cv_transform.sum(axis=0)\n",
    "    words_freq = [(word, sum_words[0, idx]) for word, idx in cv_fit.vocabulary_.items()]\n",
    "    words_freq = sorted(words_freq, key= lambda x: x[1], reverse=True)\n",
    "    return words_freq[:k]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('pike place market', 85), ('seattle tacoma international', 21), ('tacoma international airport', 21), ('free wi fi', 19), ('washington state convention', 17), ('seattle art museum', 16), ('place market seattle', 16), ('state convention center', 15), ('high speed internet', 14), ('space needle pike', 12), ('needle pike place', 11), ('south lake union', 11), ('downtown seattle hotel', 10), ('sea tac airport', 10), ('home away home', 9), ('heart downtown seattle', 8), ('link light rail', 8), ('free high speed', 8), ('just minutes away', 8), ('24 hour fitness', 7)]\n"
     ]
    }
   ],
   "source": [
    "# df_desc = df['desc']\n",
    "common_words = get_top_n_words(df['desc'], 3, 20)\n",
    "print(common_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1ef9a625940>"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfwAAAEGCAYAAACAWydsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZxcVZ3+8c9DCEtA9h38EVEEWcO+yBIETEBEwYVd2QdHR3GcGTadiYKCyqgMMyjb0IAgO4ooskkIBAIG2YKCIIbNYVHZAxKS7++Pc4pUqu+t3EpXdVe6nvfrlVd33br3nu+93elT5y7PVURgZmZmw9tCQ12AmZmZdZ47fDMzsx7gDt/MzKwHuMM3MzPrAe7wzczMeoA7fDOzYUKSCqYtPBS1WPdxh2+2gJF0lKRL5zHPbyStX/d6lKQ/N5n/q5I+Uvd6ubyOAf+NkLSWpH3y9wu3uk5Ji+V17CLpC5J+KGmxgdZV0tazkhave/14/nqEpJMb5h0nabOGaQdIukTSeyUd0vDeHpI2zvtgZGPnrGRk2f6RtIaksfPYhK0l3dgw7VZJm85juUEn6fT6uiSdJ+mTBfNtJunD+fuFiz7UWDXu8M0WPG8Cr85jnteBv9e9fgtYUdLD+d9TkkbUvX8QcL+k+yTdClwFrA3cJunW3PlvCiDpXZL+Lmlq/veUpGfqXv+l1sFnLwPfkbQGsB8wWdLtDf9elrR2bQFJUyT9XtLDwD3AmcD+wPLAb4BlWt9tlbwJzJS0iqTRQOSvSwNLShotaYU871bA5xuWf5u0r98Evi3pvXXvrQDcBJwKTAXukfRG3mdvkLZzal5vkTWA/611ePkDwiIN82wITK69kLRMbvfe+pkk/Uv+cPNG3vfPStqp2Y4pkzvu1yU9J2n/uunfkPS8pJslLVew6OXAMXneVYDxwC0F870K/EjSksA/U/z783dJI+en/l7iQz1mXU7S6sB1pE58FrASsJSk24GRwBLAARFxv6QVgRWBUcB7Jb0YEX/Nq3omItYtWP+uwIiIeFrSOGCbgjKui4jaB4i3gP+LiM3z8v8CLBYRJ+XXfcDM2oIR8VdJXyENMH4CXBQRs/O8CwECbq1fBlgvIpZqaUcNgNJh79n55QjgYGAXYBXgHGBV0j79AKmjOhO4Frgxd7oRETNJ2zAbeJb04ea5WhsR0Sfphoh450iLpIcjYvP8da5RuKQNgDuB++smvwe4V9Jruc4/A5/I8/8c2AJ4Q9LHgZOApUgfjh7NnxNWiYglI+JU4NT8s5oSET+az/22X94n7wXWAX4u6cq87/YC3gd8JNfyj3XL3ZPrX0jSfaQPVIsBN0t6JCLe+cAYEX+Q9A3S7/r3gO9GTozLH1oDeDLvf2vCHb5Zl4uIZ4CNaq8lHQxsFxGHF8y+M+kP6/uBE4FrJB0DPAL8RdJU4F3AbXXLf405ne2GwD8A9acMjgUmMueIQQCr5A8cAKuT/nCPz6/XBn6aa12eNCq/D3gaOAD4nKRa57oQMNeh8qzpaC2P5mbVPji0wZGkoxyrALcBB0bEKZIej4hd8j5fLyL+rW6Z+4BtSR3ehZLeBtYk7Z+7SR9kRgIbStoQ+A7wT7n+R0lHPtbMP5Pa179HxAfz+mcAD0fEdnXb/QiwV0T8qWAb3gusGxEvSTqJ9HP+LLBDRDySl//jwHZTP08Bh0TEs8CzkoL0AWNv4L8j4hVJlwDHNyy3WkSsmmvqA34cETfloyk/rc2UR/7vAm6NiBcl/RvwsdwOpA8NX2jzNg1b7vDNhpGIuAS4RNJE4HBgUdKh8FPrZluPdJQASZ8gHX5+Pb83gtTpv6tu/lUbmpkNPFvriEpG+DWbkzr5vUmjywuBCxvrzh9Kat8LWDQfzi+zCLAP6fD+gEXEGcAZkqYD20bE2/mt2uH1S6n7e5mPCEStIwU2ykcr/kjqnA5uaOJx4AnS4ej3AMoj+9/VjfA3l/RY3TKzSR8e6s0ErpVU+4A2KyI2q5u/3o4NNRbN04+kfyV9MHkDODoirss/01GkIwhvAZ+NiCkRcXvdclsCf4uI5/Lpm3PIBUiaKWmJiKj9no3KH3AARgPbSHqV9HOtr3EXYFfSUYIVIuI7pA9OjTXPa7MMd/hmXU3SUcBhpD+ytT/+K5E6xNofW5H+GP8K+CqwJWmk+hPSH90gnVuumZWXgdQRHQuclUeh7wYmFJTySUmTI+L3pA8FlUTE9cD1knbI24DSxYOPkz5U3BkRRzUstgyp4+h3+mGQHCPpTuBq4MF8Hv4vwJuSZkfEVqSR89ck/aV2agM4kDkfpC4H/iciJgLkju4oSd+OiBmSZilds/BCXnYxSYuSfjY1Ixte14yLiKcrbMfdwDmSzo2Iw/K0ph2+pF3ytm1M+qD3a0kb57ffDawLfBjoy9/XOwX4dv5+BPBK3XszSIftax3+63WnhPooGeFHxI+BH+ffGfJ5/D8DD5CuTeiLiFOabZPN4Q7frIvlc6vvnF+VtC1wEekQ/yu1c5l1738WOIr0f3t/0nnR/yR9EKhZEpiU139vXg7S6YA1gZsLStmZdPj996SjBqvWjdBWJh3S/3h+PZq6P9p1ap3NnyNiO0lbkzrJRqsA/6d0l8FdwGMN7/8/YN+IuKFg2fmidIfCMbntu0mnH+6NiLGSfgRcAkwHfgwQEedKupn0syB3VN/O/9YinWu+WtL2EfFoXVNrArXD8V8Dfpi//znpYrv6n+dKwN8l7UE6Bw7psP31dSP8z0TEA3XL3CZpVt6OYyPidkmnShqXP3zNy26kzvdF4EVJdwHb5/d+kq/j+Lmkn0haJiJeytv/OdLo/Kw874vMfWHl4sz9YWPZfBQK0geHTSS9SPp9LVJb9k3gd/n3Z1/ShYxWkTt8swWEpCWAM4ATIuJlST+WdFdEnF4324URcX7+YxoR8SCpg0fSNNK5/5dKmugjHYI/FXiN9Af8LdJFgf/KnMPnq5Mu9Noxr7fZIf15blbBtI1JF6q9BUyNiLFzLSD9mLkv8GuH24Bfkz7Q3EL60LJZ/lDz/4CdSNcwvNKwXEhaGfgl8C3SOe21IuJOSd8hfUDYKte9DumairVJR2TuBBbP1wfck/8dW7fulYBnSEdCJkbE0fUN55/xog31bF93Dr/mP4HPKN198VqFfREN39de1/+sRO6EJY0B/p10KqTWMU8lXfw5WdIo0sWGtYtHiYj6Wx/7yCP8CrU18rH8FrjDN1sA5MO9l5KucL84Tz6W9Af16Yi4GqDhIraVJV3BnMP57wVuySPAEcBJEXFlbeb8QeAmYExucyowPiIaO4mtSJ1Tqz6idJvYSnndSwC3F8y3BzCF/uev673zntK92ycBR9UOobcqIl7J66qffE/ZCL/BNsBlEXG6pL2Zc7vz94HL6uY7mXSF+XP5orOnCtZVX8D6pKMBbxfMV9PsvZqrgWtII+7GDyyNrgO+J+lM0lGCrUgXcX4U2D9PHw/8KV+Qtzzp6MQ/NlxIeAUwUdIDpKv1fx0RMyXtBvwP+fROtirwIUkz8muRfjd2iYh3ruOQdDjprocN8u/PssD/Vth+y9zhm3U5pfu+ryWNCpfIf3RXJHUsvwPOl/TXiJiUjwJsRhqFPwVsHRFv5vVMA3bKI8CRzN25NI4UoTynYx/mHGKGdK554dzGxqQPFrU/3rUr6pcFTgA+DiwdEf+ldG/2Wnm22vIbAh8D/o10ZKI2yq73Hub+Q78M6ZawJUvqbUWle7nzhYV7kY5GbAlcoXR73svkUxD5YrUX83nn3UmHxj+TV7Esc+/DmhF5/QuR9vMxpO3aT9J2DfOuw9w/o5HMfUj/a3n6bNL+HU//0yNzyefRLySdI38TODR/QAF4FHiI9CHjoLzIgcBqwA8l1U5P7BkRd0v6R9JRj2dIHxqIiOuY8zMnb+uFwAUR0RgYVHt/JdKHgk/l9r4SEWfmCwNrF5e6L6vAO8ms+71Iuq3uQVKAysOk8+C1e5EPJZ1Hh/TH+EvAmRHxZMN6FiX9gXyp4J7lJWrfKF2BPoXUkc6onyn/kV04IibXTX6IOR3lAaTOon7kvhJwB/DpiHgV+C+AiPibUojPpaRD6M/l9g6KiD/nc/j3lBzSf6feiDhHKYHuDwyApG8Dt0TE27mT3qTuwsiTSPuvdmThk6TrGlYjHZH4Oul2xCVIAT3H5X0yG/g0cD1weN3Rkr9FxJiCGp7I3y5Pupf/prz8T0oO6dcH73yXdErn7/mD1xt5+sqkUfUU4Cv16yi4m4CI+G5eV6ObIuLAhnlPA04rmJeI+Dlp9D8vo2j+QWt10hGDA/Pv7Zl5/U8rpfBdQ93pAiunhmt+zMyQtBrwXEQUXSXe7rbGka4JeLlh+sLAUhHxt3ksvzrp4rWi+/lbqUONF0HOY/6F627fa3xvIdIgf8B/YPN+WCgi3prnzB2Sz7NPjIi+oaqhSL4+YFvgjoiYMa/5e507fDMzsx7gLH0zM7Me4A7fzMysB/iiPetKK6ywQowePXqoyzAzW6Dcc889f4mIFYvec4dvXWn06NFMndp4N5aZmTVTd6dHPz6kb2Zm1gM8wm8DST+o3SOb7wceGxET2txGHzAhIqa3c70D1cr25rzx0VXS0B585mVGH/uLAVZnZrZgmX7KRzq2bo/w26AxEMNKjQbGDnENZmY9yR1+RZImSLpO0q2SrshhGLX3JhbMv6qk2yStruTsumULHy8q6WBJt+R/10taukk935d0l6Sbc0Qpkk6QdIekOyWtl6d9XdLtzdYnqU/S5Xl9V0q6SNIISRdLmizpZzkeFUkTJX05Z2RX3l5JXwJ+AByc11F4UYmZmXWGO/zW3JafEPYcKe+7zJKkh1X8Q0Q8k+cdmZd9Emh2zObRiNiJFKl5eNEMObP7UdJDO6YAu0raiPQktG2Bo4EtlZ5itUNEbAf8gvSc6zK1B1p8hfTc6+XzMjuSHrixaZ5vVVKC2EatbG+O4Dya9PzqsRHxAg0kHSlpqqSps2a83Pi2mZkNgDv81tSeEPYA6fB0mcNJT7naIL9eB9gmHwnYgTm55/PbRpA64KtJjzNdnPRM6akAEXEXcEFud63c7v6kTrzMdGBWvkZgNunxo3sAl5MedlF7nOXL5Cz0OgPZ3jkbFXFWRGweEZuPGFV6cMPMzOaDO/zWbJm/bkLzp079CDgU+HI+FP4IcEl+CMjRpCecDaSNHUij7I+RPhhAeqDKZgCSPghcmNudmNs9HLi7SbuN9gam5a/P1E2f0fAIVqi+vW+QHpRRe9qYmZkNEl+l35ot8qj1WdLjSsu8GRGvSboc+Bzw36Rngd9KGp0f2GTZd0u6hfRoyk+XzPMA8G1Jd5CepPZSRPTlc/eTSaPzIyPiD5Keyu0uQn5EZUWTSc/y3p3UUa/eZN6q23svcLyk20inEC4pW+GGqy/N1A5erWpm1mv88JyKJE0gjZYndrCNgwG67YlUQ2HzzTcPB++YmbVG0j0RsXnRex7hV9Tu++pL2ujrdBtmZtabfA7fzMysB7jDNzMz6wFtP6Sf7/0mIu5r97rno5ZVgIMj4pR5zDcWmN7J2NqiNqrW12I7tavyW12u7bUUtFH5d8PRuvOvk9GcZrbg6sQIf0z+N+Qi4tmKHdhYmt9X3w792mihvo4bpFq65nfDzKzXtHWEL+lkYK/8/UERsbOkJYErgCWAxyLiEEmLAX3AGsBLpNvPZuVpqwFPA4cAdwLPkxLgVgHOIyW6XUa63WtSRJzQpJ7RpAfOHJxf9wGPA7sCI4CdgTOAnYCPS3ooIg6QtHKuZWng5xFxcl7XN3Mt5O0oWt+yjfVJOq+xjZL6lgPOB5YB7omIo6u20eTH0rhPVitatqCWsu1dihSkc29EfEHSojT83CLirXz74m+AjSJiXNHvRtWazcxs4No6wo+I44BTgFPq/qCvCpwO7AKMzp3pkcD9OfL1SlJC2xHAtBzH+igpyGUU8ClgI1JS3Fak+8GPBXYDPjofZS4ZEduTgmo2iYhDSB3W0bWOGDgOuDTH1H5cUi2h7qPAmXmZwvUV1VfSRpHjSYE12wNLSxpftY0WtLJs0fZeEREfBN4jaTOKf24AWwN3RsQ4KP3dmIujdc3MOmcwbsubSUp5OwRYjjkxsFfm9/vy14OBq/L3U0gd0nM50OUJ0hEAAW8D/wG8BrxrPuo5P399khRGU6QWDXsw6cjEasCrwA0RMWUe63tjAPWtR0qtA7gL+EAH2mhl/xVtb2P073r0/7lB+hBwFS2IiLOAswAWXXVtB0SYmbVRJzr8N8iZ7Tk+9TDSIf3LgFvzPA8DWwA3k0a1zwMPkUaFN+WvDwEbF6z/n4GTgfuZEyvbitdLaq6PfH0E+FlE3CLpQOBvwEhSJzmv9ZXVN1cbUZx4VNsHj+WvF5P2QdU2qmhl2aLt3ZK0f8YAl5JOtTT+3MqWnet3o2QfAE7aMzNrt05ctHcjsHeOeN0+vz4O+HV+f3XgbGDTfJ53U1Lu+znA+pImAWszZ+Tf6FrSKPgaYIakZpGvVV0JHCtpCvBe0qHnf8nbMJ70dLyqyuprbKPIycC+km4nxeXe0GIbA6mvqj3yfnk4X21f9ecG/X83zMxskDha1yrLF+1N6OTtizWO1jUza52jda0talfwm5nZgsdJe2ZmZj3AHb6ZmVkP6KpD+kXRq2VxrAOIkHX0b8U2hjL619G688/RumZWpNtG+EXRq+2OY+2aeFdH/5qZ2WBpd7Tu4sDlpPjVv5JS8hYBLgBWAh6MiM+XxO0WxfJWimOVNKqxjZL5HP3b5dG/ZmbWGe0e4a8HzI6IHUid35KkGN1pedqqkjaiIG63KHq1ShxrVtRGP47+7e7oX0frmpl1TrvP4f8WmCbpBlKn+CtSTO22+RzxMqTO4Pf0j9sdiKI2qibQOfq3uo5G/zpa18ysc9rd4W8MTI6I4yVdTEpTewS4OyLOk7QHqVMoituF4ujVKnGsRW2UcfRvd0f/Ao7WNTNrt3Yf0p8OfFHSHaRz2lNJMbq75ejVo4CnKI7bheLo1SpxrEVtlHH0b3dH/5qZWQc4Wte6kqN1zcxa1yxat9tuyzMzM7MOcIdvZmbWA9zhm5mZ9YCuita16iQtDVxN+tB2WkRcPYht/yAijp7HPIX1VVkWHK3bjKNzzWx+uMNfcG0M3BERXx3shqt02JTUV3FZMzNrMx/SXwBJ+hLwX8CBkiZKWjFPnyjpu5Kuz69HSbpC0iRJ/1M2raSNqZJWkvScpFUl/bLuvYnzU9+8lnXSnplZ57jDXwBFxGnA0UBfRIyNiBfyW1sDd0bEuPy6KHK4UgwxKR9/HHA38GFSiuJA65vXcmdFxOYRsfmIUUtXbc7MzCpwhz+8TIuIq+perwPslUfVa5FChYqmFfkt6aFBvyA9L+CeDtVsZmaDwOfwh5fGaN2iyOGqMcT3Al8Dvkx6uFDhEwg7xdG6Zmbt5RH+8FYUOVw1hvi3wBOkQ/vPR8QTg1CvmZl1iKN1rSs5WtfMrHWO1jUzM+tx7vDNzMx6gDt8MzOzHtDVV+lLmgBMjIiJBe9VimgtWG4MQETcN+ACB9jGYNTSbmU1z+/Po0yvR+s6PtfM2m2BHeEPoHMZk/91UtU2BqOWdius2ZG5ZmbdreMjfElTgd2BB0kdxbmkQJcrgCWAxyLiEEmLA5cDSwF/JYW9AOwq6Rt5+viIeDavd2JEjM3fTwBGAtvX5gNeBq4ClgP+CEwD3gXslZc5KCJ2lrQo0AesBjwNHAIc37i+WrsN21ZU84kFbSxZsL0nF8w3CrgAWAl4MCIK732XtFiueQ3gpbw/Z1XZDuAw4KGI+Kmk44DHSOE6c7Vbsk+/1FhzXU31P4/52qeSjiQlATJiqRUxM7P2GYwRflFE66qkMJddgNGSVgbWA2bnyNfzgCXz8u/L064CPtSkncb51iV1Ntvl974VEccBpwCn1HVWR5AS6nYEHgUObaHdfjWXtNFve0vmqxp7eyRwf0RsB1wJbNDCdlwO7Jbf2wH4ZZN251q2pOYi87VPHa1rZtY5g9HhF0W0zgQOBy4ijcAXz/NNk3QD6QPCjLz8Bfnrk8AiTdppnO8ZYDNgEnBak+XWA+7K308BPtBCu2U1Nyra3iJVY2/XJX2AgjSS/k3V7YiIPwBrSFoKeCkiXm/SbtV932gg+9TMzDpgMC7aK4poPZx0iPsy4NY838bA5Ig4XtLFpEO/AK9XbKdxvvHAiQXPiX8DWB5AkoCHSA+duSl/fYh0KLpKu0U131zQxmH0396iWqrG3j4MbJHbOh54vsXtuJv0cJtr8uuidrcqWXaumqM4uWkg+xRwtK6ZWbsN1gi/MaL1RuA44Nd5ntWB6cAXJd0BrAIMNGbtXuB0Sb+WdImkDfL0G4G9JU0mddDnAOvnqNm1SSPmqspqbmyjaHuL5qsae3s2sGkekW8KXNjidlxO6vCvrVtflXaLai4ykH1qZmYdMGyjdSUdAexHOpw+Ezi16PY+606O1jUza12zaN2uvg9/ICLibNLI1czMrOctsPfhm5mZWXXDdoRvC7ZeS9pzsp6ZdZpH+B0gaUwtgrbZtDx94mDXYmZmvccdfmcUxc8OVYzughjfa2ZmbeZD+gVKInMXoX/8bNXI3H7TStqtGq3br908fSIphGejiBjXQrv3kO7lf4t0e+F5pPvnJ0TE9NpDjIDfAZeSInIfioh/yCmJfcDSwM8j4mRJfQXL/i/wZ1Ig0jrA8RHxy4Y6HK1rZtYhHuEXK4r5LYqfrRSZ20IkbdVo3aJoYkid9J0RMa7FdkeRPtRsBOxPCt0psj3pg8h2wCRJC5HyBS6NiG2Bj0tavmRZAZ8FVga+QgoOmoujdc3MOscdfrGiyNyi+NmqkblVVY3WLWt3WkRcNR/tPhcRr5ECkmaROud6tfVfB4yQdCOwbkTMzjV/Lte8BClRr2jZ2rqnl7RhZmYd5EP6xYoic4viZytF5ub42SqRtFWjdcvafa1g3irtFnkLWFHSU8CupM5+G+DCiJgiaXI+dP8I8LOIuEXSgcDfSpZtiaN1zczayyP8YtPpH5lbFD9bNTK3bFqjqhG3Ze2WzTuvdotcAnwb+CHpEbqQ4pG/I+lO0jn/J0inDP4lr3888FzJsmZmNoSGbbSuLdgcrWtm1rpm0boe4ZuZmfUAd/hmZmY9wBftZZImRsTYoa6jkaSDASKir8K8Y/K893Wwnn5tlLU7kH3qaF0zs/byCH94GYxUvW5KETQzs4p6aoSfU9+2IgXNvADsGxFvF8w3ArgQWBP4C/BJ4G3gv0kd20xgX9IV6WcB78/r2yciZhWsbyIpZa4WkHM38E0qpOXVrWP93P6epPvYG1P/qqbqdXWKoJmZdUYvjvBvi4gdSZ31x0rmWR74BbAj8AqwKfBRYOGI+CBwKrBZXn5kXt+TQLPjsscD787LbE3FtLxsVVLIzn4R8SoFiXwtpOp1bYqgpCMlTZU0ddaMl5usyszMWtWLHf49+esDwOiSeWYCe5BGwmuR0uLWJY3MiYhrSWEy6wDb5FH5DswZwfcTEdOBP+dEO9FaWt4XgKdJRxygeiJfka5NEXS0rplZ5/Rih79l/roJ5aEwewPT8tdn8rSHyfnvkg4ATiSlzF2SL0w7mvRwmapqaXn7Aa/XTS9KyzsR+Fz+Sm73B7ndrzInke8N0ukKJJVF19ZSBD8MLMucFMHG9ZXVV9RGlXbLajYzs0HQU+fwsy3yKPNZ4NqSeSaTYmt3J3VmqwMXMycFbwZwEOkc+Eck3QoEcGALddwInEFK1CO3Mb1k3jcj4ilJD0vak5TId56kQ0inHPavW+dl+QPJccCkgnVNB74t6QTgTVKK4JSC9ZXVV9RGlXbLai7kaF0zs/bqqaS92qNaI2LiEJdi8+CkPTOz1jVL2uupEX5ETBjqGszMzIZCL57DNzMz6znu8M3MzHrAsDmkXzVWdjDiZ9utSXTtDyLi6KGpaq462r5Ph3u0rqN0zWywDacRftV41wUxBraw5m7o7LMFcZ+amfWUBW6EXxINeyL9I1+rRsNWinyVtBjQB6wBvAR8mhRx2wesRgrGOYSUqDeSdH/7UsB40j3tD0XETyUdR7r//xeN7ea7CBqX/VJjzXU1vfNwGkmLVqklIp6tuG3Mb31l+7QsNtjMzDpvQRzh94uGLYl3rRQNS4XI17r57o+I7YArgQ2AI/KyOwKPAofmed+X13cV8CHSB5Td8ns7AL9s0u5cy7YQXVu1lqrbNpD6ypYtig1+h6N1zcw6Z0Hs8IuiYYtUjYatGlP7TrQuaTT8G9KHj7vytCnAB/L3F+SvTwKLRMQfgDUkLQW8FBGvN2l3rmVLailSqZYWtm0g9ZUtWxQb/A5H65qZdc6C2OEXRcNC/3jXqtGwVSNf34nWJR0qPxx4iDRqJX99KH//Ov3dTYrfvSa/Lmu3aNkq0bWt1NKoaNsGUl/ZskWxwWZmNggWuHP4FEfDQv9416rRsFUjX88Gzs+j1r8CB5DidPty3O5TwLdIHWaRy4HbmfMAnFaiZqtE157TQi1Vtm3EAOprKUa3iKN1zczaq6eidW3B4WhdM7PWNYvWXRAP6ZuZmVmL3OGbmZn1AHf4ZmZmPWBBvGivrZo9Mnde0bXd8LhdSQcDRERfk3nmGcEraTQwupPb0kobwzFa13G6ZjaUPMJvoouiawek4naMBsZ2tpJBacPMzAr0/Ag/21XSN2iIn22Irl2clC63HPBHYFqzZeuVxAF/FdiKdP/6C8C+pKjes4D352n7ALMLpi0EXAYsQwoYurjZxjVsx8GkLIONgVVIMbo7k6J4l5E0Ntf3l8Z2I2JWvnXvZ8AhEbFRyfoeKqj5C41tRMQLzeo2M7P28Qg/qRI/uy4po367PP+3Wli2Xxxwnn5bjsJ9DvhY/jcyT3sS+EjJtL2BJyJiJ+CJ+djeLUgphacAe0bEaaRQoL6IGJs74qJ2IUUWR0RsVLa+omVL2piLo3XNzDrHI/ykSlzsM8BmpNCb01pctj4O+FHgV3n6PfnrA6TD3QsD2+RR9JLA70lHFBqnLQ/cn5edn5vVfxIRMyU9SYq+LbJOQbsALwP/NY/1lS3bVA3eHNMAAB00SURBVEScRToywKKrru2ACDOzNnKHn1SJnx0PnBgRV8/HsrU44OMlXcycOOAtgeuBTYDrAAGXRMR/SNqOlOS3YsG0NYFN8zo2IWXnt6JqPG5juwAzImL2PNZXtuxcbUST1Ccn7ZmZtZcP6Vd3L3C6pF9LukTSBi0sOx34oqQ7SOe5a6PyLfIoeBngWlLO/mqSbgVOIh2uL5p2JfD+vOz7B7ph2b3AOpJuI51zL2q3qrJlG9swM7NB4mjdiiQdQXoQz8z879SB3MLWDbf0dTNH65qZta5ZtK4P6VcUEWeTHgrTrvVNaNe6zMzM5sWH9M3MzHqAO3wzM7Me4EP6PaIxgrc+jKdN638nvjdH6E6IiIPnd30LYrSuo3PNrJtVGuFLWkzS5vn7wySV3W9uPWq4xBCbmQ1XVQ/pXwasn79fGbioM+V0L0mLS7pW0iRJV0taWNIESddJulXSFXnaCEkXS5os6WeSRir5nzxtoqRV8rSz65YdUdLuwZK+n28H/J2kDYqWLZk2Mtd6C7B/k21rpZZb8r/rJS1d997EgvmXzPNukF9/XdLtjcuamVnnVe3wl42I8wFypOwKnSupa1WNx10e+AWwI/AKKSDno8DCEfFB4FRSYl9ZdG2ReUbXlkyrGsHbSi2P5vXdBBzeZL6FSc8PODkipkkaA+wQEduR9s9nGxdwtK6ZWedU7fCflnSMpJ0kHQM838miulR9PO44YEae3hiPOxPYg9TZrQUsTsrhvxsgIq4lperVx8/uQDpyUuYnETGTOfG9RcsWTXsP1SJ4W6mlcXvLfIL0AJ4xdW2sldvYn/TBaC4RcVZEbB4Rm48Y5QMAZmbtVPWivYOBI4FPknLR+43OekDVeNy9SU/S+xZwaZ7nYWB34FxJB5COFvyG4vjZIlWiawcSwVsWhVtkS+DMvL7Hmsz3M9LvySRJ5+Q2JkbEZ/Mh/jWbLOtoXTOzNqva4c8EzidloW8HjATe7FRRXWo68G1JJ5C2fSqp06/F4z5Lisddm3TNw+6k/bU66fG1u0maRDoycBDpMbkfyfGzARzYQi3XFCx7Z8G0qcC+ub7ZlHf4Resr8+58TcCbpEfhlnkzImZL+h7w1Yj4Z0lP5TYWAf6h4raamVkbVIrWlXQl6bz1ONLT21aOiF06XFvX67V43MZb+zrJ0bpmZq1rR7Tu8hFxraQvRMR4SZPbWN8Cq9ficQejozczs86oetHeq5J+CtwjaXfg1Q7WZGZmZm1WdYT/KWC9iPitpI3xo03NzMwWKJU6/Ih4U9JbksaRbg2b1dmyhq/G8/7qbMTtWGBsu089SOojRedOb+d66y0I0bqO0jWzBUnVaN3Tga8DJ5PuLb+4k0XZ/HPErZmZFal6Dn/DiPgE8HJE/ALouVSUHKP7TaVo3ftyPO6oHEU7SdL/5PmKpi0r6aZ8O9vYJm30W7ZJLXNF+ta9N7Fg/lUl3SZp9XbE6BbM+31Jd0m6WdJyedoJku6QdKek9fI0R+uamQ2Rqh3+C5L+HVhG0mdJ95z3ovflaN2rgA+Rwoim5WmrStqoybRrcyTtzCbrL1q2TGOkb5klSffZ/0NEPEObY3SVHqT0KLAN6T7/XXPd20XEtsDRwJaO1jUzG1pNO3xJO0jaATgXeBlYlDS67+t8aV3pgvy1PuJ2rzyqXosUslM0rZWI28Zly1SNuD0c+BOwQV0b7YzRDVKa39XA5syJEp4KEBF3kfabo3XNzIbQvC7a2yl/3RF4G/gpMJ501f51HayrWxVF3N4dEedJ2oP0QaBo2pOkpw3eQsqWv75k/UXLlmmM9C3zI+Ak4EZJV9P+GN0dgIiIj0n6bp72MCmOGUkfBP4R+C6O1jUzGzJNO/yI+DqApJsj4sO16flctMHZwHmSDiE9GW//kmlnAZdL+iQplriV9ZVpjPQt82ZEvCbpcuBzwH/T3hjdB0iRw3cALwIvRURfPnc/mXQK48iI+IOjdc3Mhk7VaN0bgZ+TDkuvD3wyIj7U4dqsxGBE+g5mjG4RR+uambWuHdG6nyJdULYv6TDzp9pUm82HwYj0dYyumdnwUjV45yXgOx2uxczMzDqk6m15ZmZmtgCrekjfOiDfm05E3JdfT6CFc/PziuUdzPPw9ZG+7eBoXTOz9vIIf2iNyf8WeI70NTPrbh7hD4CkxYHLgaWAv5IuZhxBCiZaDXgaOAQ4njxyr426ySE7eT0HRcTOefqukr6R1zk+IiqlGkpaErgCWAJ4LCIOqXtvfdLteHuSHnx0AbAS8GBEfL5kfRMKah5Nuq1w+8b66o82SFq0ZB8ULlvX5pGki0MZsdSKVTbbzMwq8gh/YNYDZuco3PNIMbZHkOJxdyRFzh5atGBEHAecApxS19lD//jeqlYFTgd2AUZLWrlu+kXAfhHxKq3F9xapUl/ZPmi6rJP2zMw6xx3+wPwWmCbpBmAcMIP0IeCu/P4U4AMNyyw+j3U2xvdWNZMUo3sRsFxdO18gjbJryXatxPfW1Ndcpb6yfTC/22ZmZgPkQ/oDszEwOSKOl3Qx6XD1Q8DWpAfObJ1frwDUjlGPJ+XOA7xBzpSXpDytMb63qsNIh/QvA26tm34i6WE15wIfpnp871slNVepr2gfrFZxWcDRumZm7eYR/sBMB76YY2VXIT0w5hxgfUmTgLVJ57KvAf5J0o9I5/prbgT2zhG02w+wlhuB44Bf59e1kfubEfEU8LCkPUnxvbvl+o4CnipZX1nNVRTtAzMzG0KVonXNBpujdc3MWtcsWtcjfDMzsx7gDt/MzKwHuMM3MzPrAb5KfwHQGMHbwXbaGo9bsP7RwOgq0cHdGq3rOF0zW1B5hL9gGJQI3kGIxx0NjO1wG2ZmVsAj/DYridtdhIY422ZRuA3rO5mGCN6iZSUtRrr9bQ3gJeDTETGjYH3NInjnehhPDuf5DbBRRIzLcbtbAaOAF4B9I+JtSaeTPpC8BHwmIl7My/4MOCQiNpL0JVLE7jKSxgKfiogXGmpztK6ZWYd4hN9+RXG7RXG2ZVG4cymJ4C1a9kjg/ojYDrgS2KCkvkrtZlsDd0bEuLppt+XI3OeAj+XwnsUiYvvc7jF17UREbJS34zTgaKAvIsY2dvZ5Hkfrmpl1iDv89iuK2y2Ksy2Lwq2iaNl1gbvz+32kkXnVZctMi4irGqbdk78+QDpEXxaj+zLwX/PcEjMzGxQ+pN9+RXG7RXG2ZVG4RRojeIuWfRjYAriZ9GS650mpeo1aafe1gmlbAtcDmwDXAW+TTjmcw5wYXYAZETG7YDtG1bYjmqQ+OVrXzKy9PMJvv+n0j9stirMti8It0hjBW7Ts2cCm+SjCpsCFTdZVtd0iW+Q2lgGujYhfAG9Iuh34BPDdJsveC6wj6TZgnxbbNTOzAXC0rlWWL9qbWOW2uoFytK6ZWeuaRev6kL5VFhEThroGMzObPz6kb2Zm1gM8wreu5KQ9M7P2GnYjfEkTcrDLQNczphZp242K6iurOV9k1+r6K+/HVvbV/NRiZmYDN+w6/DYalDjbASiqb6hq7vZ9ZWbW84bFIX1Jy5LibEcAAiZKWpQUQLMa8DQp1vUOYHfgQVIHdS4prGYk6Xa3pYDxwJfoH2c73+uLiGcLap7vCN6SuN1+00r21ajGNprs2l0lfaNuv7xYsA++XlBLK23U1+ZoXTOzDhkuI/wjSfeE70RKkgM4gpQUtyPwKHAo8Dgp/e5u4MOkVDyA9+XY26uAD5XE2c73+kpqnu8I3qL6Smou21eNbZRp3I5++6Ck3VbaeIejdc3MOme4dPjvAe7P39du3i6KfP0t8GngF6QRdS0m9oL89UnSKLtIu9c3GBG8RYraKNO4HWUxugNpw8zMBsGwOKRP6pDWB24hHVq/nhTxujVwE3MiX58AvgZ8mTRq/jwpCvf1gnU2xtkOdH2NBhrBO1d9Oaa2aFqjojbKNG5H0T7oV0uLbRRytK6ZWXsNlxH+WcAn8ohyqTztHGD9HGe7Nunc829JnfTjwPMR8USTdTbG2Q50fY2mM7AI3sb6yqY1KmqjqqJ9UNTuQNowM7MOcLSudSVH65qZta5ZtO5wGeGbmZlZE+7wzczMesBwuWivqw3mU+Y6SVIfMCEipne6rW6J1nWUrpkNFx7hm5mZ9QB3+INnV0mTJN0naRVJi0r6iaRbJV0kaRFJ90i6TtLPJN0l6ShJK+dpd0g6rmzlklaTdLuk2yR9U9LCku6UtJWkuyVtI+mMxvnysudL2jp/31f7vsRn8rJ3SFpc0nKSfp6n/SCv4/HcxqV5e3eX9H5Jt+Tt+kxb96yZmc2TO/zBM8/UOmAUKcBnI2B/YCvSLXmXRsS2wMclLV+y/tWBY4HdgI9GxNvAW3ldTwIbkm4jnGu+vOwFwAGSFgE+EBFTmmzHkhGxPfAwsAlwPHBJnra0pPGkeOPPAisDXwG2AL5DiuHdHjgm368/F0lHSpoqaeqsGS83KcHMzFrlDn/wVEmtey4iXiPd2z+L1HGuA3wuZwwsQcqxL/I2qSM/B3hXnvZ7Utb/RGBPUhJg0Xy3ANsAHwGumcd2nN9kO+7K21Grf3rDdnwduIH0zINlGlfsaF0zs85xhz94ylLrYO7UukaPAMdGxFhSZv3fSub7Z+BkUgxvLVzhXmDdvO6dgGlF80XEbFJ4zqnAjzu4HQfn7fhv0tEHMzMbJL5Kf+icA/TlNLqngG+RDuM3OgU4V9JJwJ+AS0rWdy3wI+AFYIak1UmH8P8E/AF4OCJmSuo3X0Q8Q3py39YtpgVC+vBwgaSjgKkRcYOk4wvmOxY4Oz/9b3JENI0fdrSumVl7OWnPkLQz6Rz7CRHxq6GuB5y0Z2Y2P5ol7XmEb0TEzcBmQ12HmZl1js/hm5mZ9QB3+GZmZj3Ah/Q7RNLEfEV6/bQfRMTRTZY5GCAi+jrYxgQKYn4ljclt31el7Yr19aulqOYiQxWt6yhdMxuuPMIfRM064i5oY0z+1w21mJlZm3mEP4jqR7d5pD2SlDy3FDC+br71Sfeq7xkRrw6gjcVJyX7LAX8k3YcPKeb3G3XtfgnYKy9zUETs3Gz9wG+AjSJiXL7N7gpSKNBjEXFIUS1mZja0PMIfWo1xuwCrAhcB+7Xa2RdYF3ga2C639a2idiPiONL9/qc06+yzrYE7I2JcXb2nA7sAoyWtPL/FOlrXzKxz3OEPrca4XYAvkDrpNduw/mdIt9tNAk6bR7tVTYuIq+pezySl9l1EOpKw+PyV6mhdM7NOcoc/tIrS5k4EPpe/DtR44MSI2CYiLppHu2+QHt5D0YNt6rzW8Pow0iH9/UrWa2ZmXcDn8LvPmxHxlKSHJe0ZEfN6mE0z9wLXSfon4HngpCbz3ghcJukA0hP6JlVs40bgDOCo/Hp10kNzBsTRumZm7eVo3WFM0hGkkffM/O/UxtvxupWjdc3MWudo3R4VEWcDZw91HWZmNvR8Dt/MzKwHuMM3MzPrAT6k36IcQTumavxt3XJjgekRMX2w2szLfpwUpftSq8sOJUfrmpm1l0f4LYqI++an4wXGAqMHuU2AjwPLzOeyZmY2TLjDb5GksTkWF0l9kkbn7yfk91aSdIuk2yWdmd87DzgY+IGki4rXDJIez8tdKuk+Sbs3tplfT5T0r5LukHSNkol17/dJGi3pOtK9+JdI+s/83vtzfXdJ+kyetm5ud4qkrzWpb0NJd0uaLOkfJa0h6aeS9pF0maT9JP1b43x52VskrZ6/v7n2vZmZDQ53+O23PfBgRGwHTJK0UM6X7wOOjogDmiwr4LPAysBXgC2azPt6RGwLLEmKt+0nInYDfgXsGxFfyZO/A3w913lMDtn5CHBVRGxN83voVwYOBfYEDomIp4HlgfWA50hRvr9tnC8v+2NgX0mrAG9FxDP9Nt7RumZmHeMOv31qkbLXASMk3QisGxGzW1jHE8AsUqc7i/QBoMz5+WtRPG6zeNt1SB3+DcAI0uH+C4GNcs3NMm1F+sDwfeZc//EKqYN/Gdic1OEXzXc56fTCp0idfz+O1jUz6xxftDcwbwErSnoK2JXU2W8DXBgRU/Ih7b6I+CMN0bUxwMSjiGiMsX1L0orADNLoveaddrNHgC9HxJ8kfSFvw67AN4HHgcclnRkRMwua/XdgX9KHkRvytAdIj9W9C/h0RPxNUr/5IuKVvJ+OID2Apykn7ZmZtZc7/NYtDvw9f38J8G3gsfwPUqd5vqSRwLOkUTvAlcD/5s7wwLr52+UC0qj/SeY8Brc2/excz47Asfn1ksDkiHhd0uN52RHAdSWdPaSn6/2S9HCfkZIWI43olwb+ADxYNl9EvEnK3P97RMxo10abmVk1jtatKF+c9xPSqPWIiPj9kBa0gJF0IPBl4NCIuH9e8zta18ysdY7WbYN8//w2Q13HgioifkzJuXszM+s8X7RnZmbWA9zhm5mZ9YBhcUg/h9JMbOejX3OcLRFx32DW0Gq7g62ovrKaJU2MiLHz046jdc3M2ssj/HJj8r9eabeqovq6vWYzs543LEb42a6SvgEsRYqTfYV0S9pKpOS7z+db0a4AlgAeywl45Fja3wAbRcQ4SScDe+X3DoqInYsalLQsKVBmBClsZqKkRUmpequRbks7BLgD2J1029oY4FzgbmAk6Z75Ws1famx3IOuLiGcLal4817wU8FdSEM4iVfZV0X5pYV+NamyjaD4zM+uM4TTCf19E7EC6B/xDwJHAtDxtVUkbkSJoTwd2AUZLWjkvuzVwZ0SMA4iI44BTgFPKOrDsSODaiNgJqN27fkRud0fgUVLE7OPAOFKn/GHSvev9ai5pd77XV1LzesDsPN95pGjeSvuqqL4W91VjG3NxtK6ZWecMpw7/gvy1FjW7DrBXHr2vBaxO6pQPBy4ClmNOBO20iLhqPtp8D1C7p7x20/h6pNQ5gCnAB0gd8qeBX5BG1PeU1Fyk3ev7LTBN0g2kDw0zaG1fza+iNubiaF0zs84ZTof0G6NmHwHujojzJO1B6gQPIx2mvgy4tW7e1wrW9wbpwTDNonCfBNYHbiEdWr8eeIh0xOCm/PUhUtre10jBM6cDnwc2Lqi5X7ttWF+jjUkJe8dLuph0CqCVfVW0X6rsq6I2Sjla18ysvYbTCL/R2cBukiYBRwFPATcCxwG/zvM0e0TrjcDekiYzdzZ9vbOAT+RR61J52jnA+rndtUnn339L6qQfB56PiCf6r6q03YGur9F04IuS7gBWIR2ZaGVfFe2XKvuqqA0zMxskjta1ruRoXTOz1jWL1h3OI3wzMzPL3OGbmZn1AHf4ZmZmPWA4XaXfNSQtDVxN+kB1WkRc3eb1T6AkxlfSDyLi6PlZdrBIOhggIvrK5nG0rplZe7nD74yNgTsi4quD3XCzzt7MzHqXO/w2k/QlUvztMpK2Az4VES8UxPf2i5ptMX52rijhWoxu/QNrcozuVaTgnD8C05ot27AdRRG8XwW2AkYBLwD7ArNItye+P0/bB5hdMG0h0j39y5BCfS4uaPNIUiIfI5Zascmmm5lZq3wOv80i4jTgaKAvIsZGxAv5rbnieymOmp1n/GydKjG665Ly97fL83+rhWWLIngBbssxv88BH8v/RuZpTwIfKZm2N/BEjiEuzA1w0p6ZWed4hD94GuN71wG2lTSWNOpdvWTaAyXrqxKj+wywGTAJOK3FZesjeB8FfpWn12J8HwBGk36HtslHMJYEfk86otA4bXn6xxCbmdkgcYc/eBrje4uiZluJn60SozseOLHgosH5jeAF2JIUIbwJcB3pKYGXRMR/5FMYAaxYMG1NYNO8jk1IzwUo5WhdM7P28iH9oVMUNdvu+Nl7gdMl/VrSJZI2aGHZ6fSP4AXYIo/clwGuBa4BVpN0K3AS6XB90bQrgffnZd8/wO0yM7MWOVp3GJN0BLAf6SK5mcCpA7kdbzBv6XO0rplZ65pF6/qQ/jAWEWeTjhq0a30T2rUuMzMbXD6kb2Zm1gPc4ZuZmfUAH9JvsxyrewkwgnQ1/D4R8VZ+b2XgVxGxScmyExiEc+Rl0b+NsbySxgBExH2drKfIYEXrOkrXzHqFR/jtdwDwvYj4MPAs6da4mlOBxYekqrnVon/H1t+yVxDLOyb/MzOzBZxH+G0WEWfUvVwReB5A0odII/5+MbYN5oq9BV4E+oDVSKl5hwDHk48E1D+IpjG+t2jlZdG/+b36WN6Tgb3y9wdFxM75CMRI0j35tfpeoX9EcFEs78jGaRHxdkNtjtY1M+sQj/A7RNI2wLIRMUXSIsDXgGMrLNoYe3sEKaVvR1Li3aFNlm2M7+2nSfRv43zHAacAp0TEzk3qK4oDLorlLYvqrW/T0bpmZh3iDr8DJC0HnM6czvlY4IyIeKnC4o2xt+sBd+VpU4APNMxff4qgMb63ExrrWwfYKx9dWIsUB1wfyzsOmFEyzczMBokP6bdZHs1fDhwXEbWHxOwCfEjS54Exks6JiMNLVtEYe/sQaeR+U/76ELAC6XQBpMPqtfPwjfG9A/UGKQMfSSqprygOuCiW968F024ua9jRumZm7eURfvsdRsqMP0HSREn7RMQO+fD5WOC+Jp19kXOA9XPc7tqk8/nXAP8k6UekjrRTbgT2ljSZOVn6jYrigKfTP5a3aJqZmQ0SR+taV3K0rplZ65pF67rDt64k6VXS6YJuswLwl6EuokE31gTdWVc31gTdWVc31gTdWVc31bRmRBTe5uRz+NatHin7lDqUJE3ttrq6sSbozrq6sSbozrq6sSbozrq6saYiPodvZmbWA9zhm5mZ9QB3+NatzhrqAkp0Y13dWBN0Z13dWBN0Z13dWBN0Z13dWFM/vmjPzMysB3iEb2Zm1gPc4ZtZW0haTtKuklYY6lrMrD93+NZ1JJ0r6U5JXx3qWgAkrSzptrrXQ1qfpKUlXSfpBklXS1qkC2paFrgW2BK4RdKKQ11TXW0rS7o3fz/kNUlaWNKTOYlzoqQNu6GuuvrOkPTR/P1Q/159rm4/3SfpzKGuKde1rKRfSpoq6cw8bcjrmhd3+NZVJO0NjIiIbYC1JK09xPUsC5wPLNFF9R0AfC8iPkx63PK+XVDTRsA/R8Q3getJT1Ic6ppqTgUW75KfHaR99ZO6uO21u6QuJG0PrBIRP++G/RURP6zbT7cBfxzqmrKDgIvyvffvkvRvXVJXU+7wrduMBS7L398AbDd0pQAwC9gHeCW/HssQ1xcRZ0TEjfnlisCBXVDTrflR0DuQRvnjhromAEkfIj3w6Vm64GeXbQ3sIeluSeeSHq415HVJGkl6NsZ0SR+je/YXklYHVgbW6JKa/gpsIGkZ4N3Ae7qkrqbc4Vu3WQJ4Jn//N9J/8iETEa9ExMt1k7qmPknbAMuSHlg05DXlJyruA7wIxFDXlJ9c+TXS46mhe352vwF2iYgtgZHAbl1S12eA3wHfIX1o+3yX1EWu5Yd0z8/wdmBN4IvA70mPCu+Guppyh2/d5jVg8fz9knTf72hX1CdpOeB04NBuqSmSzwMPANt2QU3HAmdExEv5dVfsJ+CBiPi//P1UUg57N9S1CXBWRDwL/BiY1A11SVoI2AmYSPf8DP8DOCoivgE8DOzfJXU11ZVFWU+7hzmHwzYmPVa3mwx5fXnkejlwXEQ80SU1HSPpM/nlMsApQ10T6VD55yVNBMYAH+2CmgAulLSxpBHAx0mj126o6zFgrfz95sBouqOu7YG7IoXGDPnverYssGH+GW5Fd/y+z5MfnmPd5qfAbZJWIx3q3HqI62nUDfUdBmwKnCDpBOA84KAhruks4DJJhwPTSPtp0lDWFBE71L7Pnf6eDP3PDuAbwMWAgGvojt8pgHOB/5W0L+lUw1jgmi6oaxzpaAN0z746mfT/bk3gTuD7XVJXU07as66Tr4zfFZiUDy92lW6szzVV0401getqRTfWBN1bVz13+GZmZj3A5/DNzMx6gDt8MzOzHuAO38zMrAe4wzczM+sB7vDNzMx6wP8Hne0VsK5AXUcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df = pd.DataFrame(common_words, columns=['desc', 'count'])\n",
    "df.groupby('desc').sum()['count'].sort_values().plot(kind='barh', title='去掉停用词后， 描述酒店的Top20单词')\n",
    "# kine:  bar   barh\n"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "import re\n",
    "# 文本预处理\n",
    "REPLACE_BY_SPACE_RE = re.compile('[/(){}\\[\\]\\|@,;]')\n",
    "BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')\n",
    "STOPWORDS = set(stopwords.words('english'))\n",
    "print(STOPWORDS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>address</th>\n",
       "      <th>desc</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Hilton Garden Seattle Downtown</th>\n",
       "      <td>1821 Boren Avenue, Seattle Washington 98101 USA</td>\n",
       "      <td>Located on the southern tip of Lake Union, the...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sheraton Grand Seattle</th>\n",
       "      <td>1400 6th Avenue, Seattle, Washington 98101 USA</td>\n",
       "      <td>Located in the city's vibrant core, the Sherat...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Crowne Plaza Seattle Downtown</th>\n",
       "      <td>1113 6th Ave, Seattle, WA 98101</td>\n",
       "      <td>Located in the heart of downtown Seattle, the ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kimpton Hotel Monaco Seattle</th>\n",
       "      <td>1101 4th Ave, Seattle, WA98101</td>\n",
       "      <td>What?s near our hotel downtown Seattle locatio...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>The Westin Seattle</th>\n",
       "      <td>1900 5th Avenue, Seattle, Washington 98101 USA</td>\n",
       "      <td>Situated amid incredible shopping and iconic a...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                        address  \\\n",
       "name                                                                              \n",
       "Hilton Garden Seattle Downtown  1821 Boren Avenue, Seattle Washington 98101 USA   \n",
       "Sheraton Grand Seattle           1400 6th Avenue, Seattle, Washington 98101 USA   \n",
       "Crowne Plaza Seattle Downtown                   1113 6th Ave, Seattle, WA 98101   \n",
       "Kimpton Hotel Monaco Seattle                     1101 4th Ave, Seattle, WA98101   \n",
       "The Westin Seattle               1900 5th Avenue, Seattle, Washington 98101 USA   \n",
       "\n",
       "                                                                             desc  \n",
       "name                                                                               \n",
       "Hilton Garden Seattle Downtown  Located on the southern tip of Lake Union, the...  \n",
       "Sheraton Grand Seattle          Located in the city's vibrant core, the Sherat...  \n",
       "Crowne Plaza Seattle Downtown   Located in the heart of downtown Seattle, the ...  \n",
       "Kimpton Hotel Monaco Seattle    What?s near our hotel downtown Seattle locatio...  \n",
       "The Westin Seattle              Situated amid incredible shopping and iconic a...  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 建模\n",
    "df.set_index('name', inplace = True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TFIDF feature names:\n",
      "3278\n"
     ]
    }
   ],
   "source": [
    "# 使用TF-IDF提取文本特征\n",
    "tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0.01, stop_words='english')\n",
    "tfidf_matrix = tf.fit_transform(df['desc'])\n",
    "print('TFIDF feature names:')\n",
    "#print(tf.get_feature_names())\n",
    "print(len(tf.get_feature_names()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1.         0.04631669 0.12699198 ... 0.05650135 0.01119715 0.03172404]\n",
      " [0.04631669 1.         0.08415874 ... 0.05948584 0.02070751 0.04023742]\n",
      " [0.12699198 0.08415874 1.         ... 0.09921478 0.03963904 0.0561999 ]\n",
      " ...\n",
      " [0.05650135 0.05948584 0.09921478 ... 1.         0.06517548 0.04202887]\n",
      " [0.01119715 0.02070751 0.03963904 ... 0.06517548 1.         0.01740597]\n",
      " [0.03172404 0.04023742 0.0561999  ... 0.04202887 0.01740597 1.        ]]\n"
     ]
    }
   ],
   "source": [
    "cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)\n",
    "print(cosine_similarities)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(152, 152)\n"
     ]
    }
   ],
   "source": [
    "print(cosine_similarities.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0               Hilton Garden Seattle Downtown\n",
       "1                       Sheraton Grand Seattle\n",
       "2                Crowne Plaza Seattle Downtown\n",
       "3                Kimpton Hotel Monaco Seattle \n",
       "4                           The Westin Seattle\n",
       "                        ...                   \n",
       "147                  The Halcyon Suite Du Jour\n",
       "148                                Vermont Inn\n",
       "149                 Stay Alfred on Wall Street\n",
       "150         Pike's Place Lux Suites by Barsala\n",
       "151    citizenM Seattle South Lake Union hotel\n",
       "Name: name, Length: 152, dtype: object"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "indices = pd.Series(df.index) #df.index是酒店名称\n",
    "indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recommendations(name, cosine_similarities = cosine_similarities):\n",
    "    recommanded_hotels = []\n",
    "    idx = indices[indices==name].index[0]\n",
    "    print('idx=', idx)\n",
    "    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)\n",
    "    top_10 = list(score_series[1:11].index)\n",
    "    for i in top_10:\n",
    "        recommanded_hotels.append(list(df.index)[i])\n",
    "    return recommanded_hotels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "idx= 49\n",
      "['DoubleTree by Hilton Hotel Seattle Airport', 'Embassy Suites by Hilton Seattle Tacoma International Airport', 'Seattle Airport Marriott', 'Four Points by Sheraton Seattle Airport South', 'Hampton Inn Seattle-Airport', 'Knights Inn Tukwila', 'Homewood Suites by Hilton Seattle-Tacoma Airport/Tukwila', 'Best Western Seattle Airport Hotel', 'Motel 6 Seattle Sea-Tac Airport South', 'Hampton Inn Seattle/Southcenter']\n"
     ]
    }
   ],
   "source": [
    "print(recommendations('Hilton Seattle Airport & Conference Center'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基于相似度矩阵和指定的酒店name，推荐TOP10酒店\n",
    "def recommendations(name, cosine_similarities = cosine_similarities):\n",
    "    recommended_hotels = []\n",
    "    # 找到想要查询酒店名称的idx\n",
    "    idx = indices[indices == name].index[0]\n",
    "    print('idx=', idx)\n",
    "    # 对于idx酒店的余弦相似度向量按照从大到小进行排序\n",
    "    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)\n",
    "    # 取相似度最大的前10个（除了自己以外）\n",
    "    top_10_indexes = list(score_series.iloc[1:11].index)\n",
    "    # 放到推荐列表中\n",
    "    for i in top_10_indexes:\n",
    "        recommended_hotels.append(list(df.index)[i])\n",
    "    return recommended_hotels\n",
    "print(recommendations('Hilton Seattle Airport & Conference Center'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# 文本预处理\n",
    "REPLACE_BY_SPACE_RE = re.compile('[/(){}\\[\\]\\|@,;]')\n",
    "BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')\n",
    "STOPWORDS = set(stopwords.words('english'))\n",
    "# 对文本进行清洗\n",
    "def clean_text(text):\n",
    "    # 全部小写\n",
    "    text = text.lower()\n",
    "    # 用空格替代一些特殊符号，如标点\n",
    "    text = REPLACE_BY_SPACE_RE.sub(' ', text)\n",
    "    # 移除BAD_SYMBOLS_RE\n",
    "    text = BAD_SYMBOLS_RE.sub('', text)\n",
    "    # 从文本中去掉停用词\n",
    "    text = ' '.join(word for word in text.split() if word not in STOPWORDS) \n",
    "    return text\n",
    "# 对desc字段进行清理\n",
    "df['desc_clean'] = df['desc'].apply(clean_text)\n",
    "#print(df['desc_clean'])\n",
    "\n",
    "# 建模\n",
    "df.set_index('name', inplace = True)\n",
    "# 使用TF-IDF提取文本特征\n",
    "tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0.01, stop_words='english')\n",
    "\n",
    "tfidf_matrix = tf.fit_transform(df['desc_clean'])\n",
    "print('TFIDF feature names:')\n",
    "#print(tf.get_feature_names())\n",
    "print(len(tf.get_feature_names()))\n",
    "#print('tfidf_matrix:')\n",
    "#print(tfidf_matrix)\n",
    "#print(tfidf_matrix.shape)\n",
    "# 计算酒店之间的余弦相似度（线性核函数）\n",
    "cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)\n",
    "#print(cosine_similarities)\n",
    "#print(cosine_similarities.shape)\n",
    "indices = pd.Series(df.index) #df.index是酒店名称\n",
    "\n",
    "# 基于相似度矩阵和指定的酒店name，推荐TOP10酒店\n",
    "def recommendations(name, cosine_similarities = cosine_similarities):\n",
    "    recommended_hotels = []\n",
    "    # 找到想要查询酒店名称的idx\n",
    "    idx = indices[indices == name].index[0]\n",
    "    print('idx=', idx)\n",
    "    # 对于idx酒店的余弦相似度向量按照从大到小进行排序\n",
    "    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)\n",
    "    # 取相似度最大的前10个（除了自己以外）\n",
    "    top_10_indexes = list(score_series.iloc[1:11].index)\n",
    "    # 放到推荐列表中\n",
    "    for i in top_10_indexes:\n",
    "        recommended_hotels.append(list(df.index)[i])\n",
    "    return recommended_hotels\n",
    "print(recommendations('Hilton Seattle Airport & Conference Center'))\n",
    "print(recommendations('The Bacon Mansion Bed and Breakfast'))\n",
    "#print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
